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EXHIBIT A 



#!/internet/bin/perl5.002 -w 


# Copyright (c) 1998 

# Eugene Wang 


# *** BEGIN *** 


# 


#input sequence (File 0) to compare 


# 


if ($#ARGV < 2) {die "argv < 2";} 

open ( Enzyme Input , $ARGV [ 0 ] ) || die "Cannot open input file $ARGV[0]"; 

#print "Input Enzyme 1 sequence = "; 

$Elsequence = <EnzymeInput>; 

chomp $Elsequence; 

$lenElSeq = length ( $Elsequence ) ; 

$Elsequence =~ tr/a-z/A-Z/; 

$ElExtLoc = <En zyme Input >; 
chomp ($ElExtLoc) ; 

$lenElTotal = $lenElSeq + $ElExtLoc; 

#print "Input Enzyme 2 sequence = "; 
$E2sequence = <EnzymeInput>; 
chomp $E2sequence; 

$E2sequence = reverse ($E2sequence) ; 
$lenE2Seq = length ($E2sequence) ; 
$E2sequence tr/a-z/A-Z/; 

$E2ExtLoc = <EnzymeInput>; 
chomp ($E2ExtLoc) ; 

$lenE2Total = $lenE2Seq + $E2ExtLoc; 

$lenElExtra = $E2ExtLoc - $ElExtLoc; 

$ElSizeStart = <EnzymeInput>; 
chomp ($ElSizeStart) ; 
$ElSizeEnd = <En zyme Input >; 
chomp ($ElSizeEnd) ; 


# 


#open input FASTA file (File 1) 


#print "Input file name = "; 
#$fname = <>; 
#chomp $fname; 

#$fname = "H_DJ0167F23 . seq" ; 

open(Infile, $ARGV[1] ) || die "Cannot open input file $ARGV[1]"; 
# 


#open output file (File 2) 
# 


open (Outfile, ">$ARGV[2] ") II die "Cannot open output file $ARGV[2]"; 
#open (Outfile, ">output . txt " ) ; 
#print Outfile "Qualif ier\tSequence" ; 

# 


#read input FAST A file 
# 


$line = <Infile>; #header line 

print Outfile "$line"; 
$linecount = 0; 
$FullSeq = ""; 

t 


#check headerline format 
# 


chomp $line; 

©fields = split (A I /, $line) ; 
$ntokens = 0; 

foreach (Qfields) {$ntokens++; } 
#$ntokens = @ fields; 

if ($ntokens > 3) 

{$FragmentID = $fields[3];} 
else 

{ 

$line s/ A > />/; 

Qfields = split (/ /,$line); 

$ntokens = 0; 

foreach (@fields) { $ntokens++ ; } 
if ($ntokens > 0) 

{$FragmentID = $fields[0]; $FragmentID =- s/ A >//;} 

else 

{$FragmentID = "UnknownFragment " ; } 

} 

while ($line = <Infile>) #read in a line 

{ 


# 0 


print Out file "Enzyme top strand: "; 
print Outfile " (5\ ' -$Elsequence" ; 

if ($ElExtLoc>0) {print Outfile " (N) $ElExtLoc" ; } 
print Outfile "-3V)"; 
print Outfile "\n"; 

print Outfile "Enzyme bottom strand: " ; 
print Outfile "(5V-"; 

if ($E2ExtLoc>0) {print Outfile " (N) $E2ExtLoc"; } 

print Outfile "$E2sequence-3\ ' ) " ; 

print Outfile " or "; 

my $ts = reverse ($E2sequence) ; 

print Outfile "(3\'-$ts"; 

if ($E2ExtLoc>0) {print Outfile " (N) $E2ExtLoc"; } 
print Outfile "-5\') n ; 

print Outfile "\n"; 

print Outfile "Segment size: $ElSizeStart - $ElSizeEnd\n" ; 

$minLen = $lenElTotal < $lenE2Total ? $lenElTotal : $lenE2Total; 
$maxLen = $lenElTotal > $lenE2Total ? $lenElTotal : $lenE2Total; 

$nMatchEl = 0; 
$nSelected = 0; 
@EnzLocLeft = ( ) ; 
©EnzLocRight = ( ) ; 
@EnzTypeLeft = () ; 
@EnzTypeRight = (); 

if ($minLen > 0) 
{ 

# for ($i=0; $i <= $lenFullSeq-$lenElSeq; $i++) 

for ($i=0; $i <= $lenFullSeq-$maxLen; $i++) 
{ 

if (substr ($FullSeq, $i, $lenElSeq) eq $Elsequence) 
{ 


# $EnzLocLeft [$nMatchEl] = $i + $lenElTotal; . 
##have to use push ( ) 

# $EnzTypeLeft [$nMatchEl] = 1; 

push(@EnzLocLeft, $i + $lenElTotal) ; 
push (@EnzTypeLeft , 1) ; 

# print Outfile "$nMatchEl\t$i\t " ; 

# print Outfile "type l\t"; 

# print Outfile "$Elsequence\t " ; 

# print Outfile substr ( $FullSeq, $i, $lenElTotal) ; 

# print Outfile "\n"; 


if ($nMatchEl > 0) 
{ 

push (@EnzLocRight, $i + $lenElTotal-l ) ; 

push(@EnzTypeRight, 1) ; 

} 


$nMatchEl++; 

} 

# if (substr ($FullSeq,$i+$E2ExtLoc,$lenE2Seq) eq 

$E2sequence) 

elsif (substr ($FullSeq, $i+$E2ExtLoc, $lenE2Seq) eq 

$E2sequence) 


{ 

# $EnzLocLeft [$nMatchEl] = $i; 

# $EnzCutLeft [$nMatchEl] = 2; 

push (@EnzLocLef t, $i) ; 

push (@EnzTypeLeft, 2) ; 

# print Outfile "$nMatchEl\t$i\t " ; 

# print Outfile "type 2\t"; 

# print Outfile "$E2sequence\t " ; 

# print Outfile substr ( $FullSeq, $i, $lenE2Total) ; 

# print Outfile "\n"; 


if ($nMatchEl > 0) 
{ 

push (@EnzLocRight, $i-l) ; 
push (@EnzTypeRight, 2) ; 

} 

$nMatchEl++; 

} 

} 

if ($nMatchEl > 0) 
{ 

push (GEnzLocRight , $i-l) ; 
push(@EnzTypeRight, 2) ; 

} 

print Outfile "Number of segments: $nMatchEl\n" ; 
if ($nMatchEl != ( $#EnzLocRight+l ) ) {die ("Counting 
error. . .nMatchEl ($nMatchEl) != $#EnzLocRight " ) ; } 

print Outfile "Matched loci:\n"; 

for ($i=0; $i < $nMatchEl; $i++) 
{ 

print Outfile "$EnzLocLeft [$i] \t " ; 
} 

print Outfile "\nSegment Size:\n"; 
for ($i=0; $i < $nMatchEl-l; $i++) 
{ 

$tmpSegSize = $EnzLocRight [ $i] - $EnzLocLef t [$i] + 1; 
if ($tmpSegSize >= $ElSizeStart && $tmpSegSize <= 

$ElSizeEnd) 

{ 

$SegSelected[$nSelected++] = $i; 
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print Outfile n $tmpSegSize\t " ; 


## 


## 
## 


print out the Segment (El) sequences 


print Outfile "\nSegments Selected ( $nSelected) : 
for ($i=0; $i < $nSelected; $i+ + ) 
{ 

$selSeq = $SegSelected [ $i ] ; 
$Elleft = $EnzLocLeft [$selSeq] ; 
$Elright = $EnzLocRight [ $selSeq] ; 


if ($lenElExtra > 0) {$Elright += $lenElExtra; } 
else {SElleft += $lenElExtra; } 
$lenSelSeq = $Elright - $Elleft + 1; 

$OutputHeaderLine = ">" . $FragmentID . "_ M .$selSeq . "\tsize=" . 


$lenSelSeq; 

$OutputHeaderLine .= M \tLoci=" . $Elleft . "-" . $Elright; 
$OutputHeaderLine . = " \tEnz$EnzTypeLef t [ $selSeq] - 
Enz$EnzTypeRight [$selSeq] " ; 


$SeqEltoNextEl = substr ($FullSeq, $Ellef t , $lenSelSeq) ; 
print Outfile H \n$SeqEltoNextEl\n" ; 
print "\n$SeqEltoNextEl\n"; 


print Outfile "\n$OutputHeaderLine" ; 
print "$OutputHeaderLine" ; 


# 


Segment sequence 


return ($lenFullSeq) ; 
} 


EXHIBIT B 


#!/internet/bin/perl5.002 -w 

£****** ********************** ************************************ 

# Copyright (c) 1998 

# Author: Eugene Wang 

# Title: Ligate 

# Purpose: Find matching segments /sequences in two files 
£********************** ************** **************************** 

if ($#ARGV != 2) {die "Number of argv ($#ARGV+1) != 3";} 

# 


#input file 
# 


open ( Inf ileLigate, $ARGV[0] ) or die "Open error ... $ARGV [0] \n" ; 

$locLigate = <Inf ileLigate>; 
chomp $locLigate; 
$seqLigate = <Inf ileLigate>; 
chomp $seqLigate; 

close (Inf ileLigate) ; 

# 


#output file 
# 


open (Inf ile, $ARGV[1] ) or die "Open error ... $ARGV [1] \n"; 
$OutName = $ARGV[2]; 

open (Outfile, ">$0utName") or die ("Open error ... $OutName" ) ; 

$alreadyReadOne = 0; 
$ sequence = ""; 

while ($line = <Infile>) * tread in a line 

{ 

chomp $line; 

next if ($line eq ""); 

if ($line =~ II $line =- /">/) ##if first char is a 

or 

{ 

if ($alreadyReadOne == 1) { 

if (&Ligate (^sequence, $locLigate, $seqLigate) == 1} { 
print Outfile "$headerLine\n" ; 
print Outfile "$sequence\n" ; 
}; 

$sequence = "" ; 
} 


$headerLine = $line; 
$alreadyReadOne =1; 
■ } 
else 
{ 

$sequence .= $line; 
} 

} 

if ($alreadyReadOne ==1) { 

if (&Ligate ($sequence, $locLigate, $seqLigate) == 1) { 
print Out file "$headerLine\n"; 
print Outfile "$sequence\n" ; 

>; 


close (Inf ile) ; 
close (Outfile); 


######################################################################### 
############ 

tcompare sequence with Ligation Adapter sequence 

######################################################################### 

############ 
sub LigateO 
{ 

local $retcode = 0; 

local ($seq f $locLigate, $seqLigate) = @_; 

local $lenLigate = length ($seqLigate) ; 
local $lenSeq = length ($seq) ; 

if { (substr <$seq, $locLigate, $lenLigate) eq $seqLigate) && 

(substr ($seq f $lenSeq-$locLigate-$lenLigate, $lenLigate) eq 
$seqLigate)) { 

$retcode = 1; 

} 


return $retcode; 
} 


